Normalized by county population.
from datetime import datetime, timedelta
import math
import os
import time
import json
from plotly.offline import init_notebook_mode, iplot
from bokeh.io import output_notebook
from bokeh.models import FuncTickFormatter, ColumnDataSource
from bokeh.plotting import figure, output_file, show
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
# bokeh: configure for notebook
# https://docs.bokeh.org/en/latest/docs/user_guide/jupyter.html#userguide-jupyter-notebook
output_notebook()
# load data: check if we have a fresh local version (8 hours ago or newer)
# if we don't have a fresh version, pull down a remote csv
def download_data_source(local_path):
"""Download latest data and save to 'local_path'"""
import requests
data_source = (
"https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
)
resp = requests.get(data_source, stream=True)
resp.raise_for_status()
with open(local_path, "wb") as openfile:
for block in resp.iter_content(1024):
openfile.write(block)
local_path = os.path.join(".", "us-counties.csv")
if os.path.isfile(local_path):
mtime = int(os.stat(local_path).st_mtime)
now = int(time.time())
age = (now - mtime) / 60 / 60 # to horus
if age >= 8:
print("Local Data: is stale - downloading")
download_data_source(local_path)
else:
print("Local Data: not found - downloading")
download_data_source(local_path)
print("Local Data: loading from file")
df = pd.read_csv(local_path, dtype={"fips": str})
df = df[df.state == "Indiana"] # filter to only indiana data
df = df[df.county != "Unknown"] # filter out unknown county
df.head()
# download geojson
def download_geojson(local_path):
import requests
data_source = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
resp = requests.get(data_source, stream=True)
resp.raise_for_status()
with open(local_path, "wb") as openfile:
for block in resp.iter_content(1024):
openfile.write(block)
geojson_local_path = os.path.join(".", "geojson-counties-fips.json")
if not os.path.isfile(geojson_local_path):
print("Geojson: downloading")
download_geojson(geojson_local_path)
with open(geojson_local_path, "r") as openfile:
geojson = json.loads(openfile.read())
print("Geojson: loaded data")
print(json.dumps(geojson, indent=2, sort_keys=True)[:100])
# import county population data
with open("in-county-populations--modified.json", "r") as openfile:
in_county_data = json.loads(openfile.read())
df["population"] = df.county.apply(lambda county: in_county_data[county]["Pop"])
df["cases_pop"] = df.apply(lambda row: (row.cases / row.population) * 100, axis=1)
df["deaths_pop"] = df.apply(lambda row: (row.deaths / row.population) * 100, axis=1)
df.head()
# Set some defaults
in_center = {"lat": 39.766028, "lon": -86.441278}
default_cloropleth_kwargs = dict(
geojson=geojson,
locations="fips",
color_continuous_scale='Plasma',
mapbox_style="carto-positron",
zoom=5.4,
center=in_center,
opacity=0.5,
)
cases_pop_values = df.cases_pop.unique()
_min = min(cases_pop_values)
_max = max(cases_pop_values)
fig = px.choropleth_mapbox(
df,
color="cases_pop",
range_color=(_min, _max),
labels={"cases_pop": "Percent Infected"},
hover_data=["county", "cases"],
**default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
cases_values = df.cases.unique()
_min = min(cases_values)
_max = max(cases_values)
fig = px.choropleth_mapbox(
df,
color="cases",
range_color=(_min, _max),
labels={"cases": "Number of Cases"},
hover_data=["county", "cases"],
**default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()